import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import geopandas as gpd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
#ACS data for population
#2022
url = 'https://api.census.gov/data/2022/acs/acs1?'
params = {
'get': 'NAME,B01001_001E',
'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2022=df
#2021
url = 'https://api.census.gov/data/2021/acs/acs1?'
params = {
'get': 'NAME,B01001_001E',
'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2021=df
#2020
df = pd.read_excel("C:/Users/thoma/Downloads/XK200101.xlsx")
state_names = df.iloc[5].tolist()[1::2]
populations = df.iloc[7].tolist()[1::2]
df_new = pd.DataFrame({'State': state_names, 'Population': populations})
pop_2020=df_new
#2019
url = 'https://api.census.gov/data/2019/acs/acs1?'
params = {
'get': 'NAME,B01001_001E',
'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2019=df
# merged_pop
merged_pop = pd.merge(pop_2022, pop_2021, on='State', suffixes=('_2022', '_2021'))
merged_pop = pd.merge(merged_pop, pop_2020, on='State')
merged_pop = pd.merge(merged_pop, pop_2019, on='State', suffixes=('_2020', '_2019'))
#2019 , from bls for employment
url = "https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2019&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
cells = row.find_all(['th', 'td'])
if len(cells) >= 3:
state = cells[0].get_text(strip=True)
annual_avg_employment = cells[2].get_text(strip=True)
data.append({'State': state, '2019 Employment': annual_avg_employment})
df_2019 = pd.DataFrame(data)
#2020
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2020&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
cells = row.find_all(['th', 'td'])
if len(cells) >= 3:
state = cells[0].get_text(strip=True)
annual_avg_employment = cells[2].get_text(strip=True)
data.append({'State': state, '2020 Employment': annual_avg_employment})
df_2020 = pd.DataFrame(data)
#2021
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2021&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
cells = row.find_all(['th', 'td'])
if len(cells) >= 3:
state = cells[0].get_text(strip=True)
annual_avg_employment = cells[2].get_text(strip=True)
data.append({'State': state, '2021 Employment': annual_avg_employment})
df_2021 = pd.DataFrame(data)
#2022
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2022&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
cells = row.find_all(['th', 'td'])
if len(cells) >= 3:
state = cells[0].get_text(strip=True)
annual_avg_employment = cells[2].get_text(strip=True)
data.append({'State': state, '2022 Employment': annual_avg_employment})
df_2022 = pd.DataFrame(data)
#merged of bls
merged_df = pd.merge(df_2019, df_2020, on='State', how='inner', suffixes=('_2019', '_2020'))
merged_df = pd.merge(merged_df, df_2021, on='State', how='inner', suffixes=('_2020', '_2021'))
merged_df = pd.merge(merged_df, df_2022, on='State', how='inner', suffixes=('_2021', '_2022'))
merged_df.columns = ["State", "2019", "2020", "2021", "2022"]
merged_df = merged_df.head(52)
#for bls,unemployment rate=poulation-employment/population
merged_df = merged_df.drop(0)
m_df = pd.merge(merged_df, merged_pop, on='State', how='left')
m_df['Population_2022'] = m_df['Population_2022'].str.replace(',', '').astype(int)
m_df['2022'] = m_df['2022'].str.replace(',', '').astype(int)
m_df['Population_2021'] = m_df['Population_2021'].str.replace(',', '').astype(int)
m_df['2021'] = m_df['2021'].str.replace(',', '').astype(int)
m_df['Population_2020'] = m_df['Population_2020'].str.replace(',', '').astype(int)
m_df['2020'] = m_df['2020'].str.replace(',', '').astype(int)
m_df['Population_2019'] = m_df['Population_2019'].str.replace(',', '').astype(int)
m_df['2019'] = m_df['2019'].str.replace(',', '').astype(int)
m_df['Unemployment Rate 2022'] = ((m_df['Population_2022'] - m_df['2022']) / m_df['Population_2022']) * 100
m_df['Unemployment Rate 2021'] = ((m_df['Population_2021'] - m_df['2021']) / m_df['Population_2021']) * 100
m_df['Unemployment Rate 2020'] = ((m_df['Population_2020'] - m_df['2020']) / m_df['Population_2020']) * 100
m_df['Unemployment Rate 2019'] = ((m_df['Population_2019'] - m_df['2019']) / m_df['Population_2019']) * 100
# interactive map for unemployment
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'State'}, inplace=True)
merged_df = pd.merge(gdf_states, m_df, on='State', how='left')
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color=(merged_df['2022'] - merged_df['2019']) / merged_df['2019'] * 100, # Calculate unemployment rate change
hover_name='State',
hover_data=['2019', '2020', '2021', '2022'],
title='Unemployment Rate Change (2019-2022) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()